{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "*** Introductory Examples for the NLTK Book ***\nLoading text1, ..., text9 and sent1, ..., sent9\nType the name of the text or sentence to view it.\nType: 'texts()' or 'sents()' to list the materials.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "text1: Moby Dick by Herman Melville 1851\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "text2: Sense and Sensibility by Jane Austen 1811\ntext3: The Book of Genesis\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "text4: Inaugural Address Corpus\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "text5: Chat Corpus\ntext6: Monty Python and the Holy Grail\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "text7: Wall Street Journal\ntext8: Personals Corpus\ntext9: The Man Who Was Thursday by G . K . Chesterton 1908\n"
     ]
    }
   ],
   "source": [
    "from nltk.book import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Displaying 18 of 18 matches:\nsending me to bed supperless ,-- my mother dragged me by the legs out of the c\ngnorant whim of his crazy , widowed mother , who died when he was only a twelv\ner counterpane -- God pity his poor mother !-- it will be the ruin of my house\neg here , and all of us , and every mother ' s son and soul of us belong ; the\ny well be regarded as that Egyptian mother , who bore offspring themselves pre\nhere . The whale - ship is the true mother of that now mighty colony . Moreove\narp knife from his girdle ; \" every mother ' s son of ye draw his knife , and \n about a foot across . Probably the mother during an important interval was sa\ne cut by the hunter ' s lance , the mother ' s pouring milk and blood rivallin\nught to relieve my old bed - ridden mother by part of my share of this whale .\n' t ,\" said the captain , \" but his mother did ; he was born with it . Oh , yo\nnd all his Sons -- how many , their mother only knows -- and under their immed\n cinders ; the house was sold ; the mother dived down into the long church - y\nlew close to Ahab . \" Are these thy Mother Carey ' s chickens , Perth ? they a\n art but my fiery father ; my sweet mother , I know not . Oh , cruel ! what ha\n stroke and caress him ; the step - mother world , so long cruel -- forbidding\nly wakes ; sits up in bed ; and his mother tells him of me , of cannibal old m\n grow . Oh , Stubb , I hope my poor mother ' s drawn my part - pay ere this ; \nNone\n"
     ]
    }
   ],
   "source": [
    "#搜索文本\n",
    "\n",
    "print(text1.concordance(\"mother\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/var/pyenv/versions/2.7.12/lib/python2.7/site-packages/nltk/draw/__init__.py:15: UserWarning: nltk.draw package not loaded (please install Tkinter library).\n  warnings.warn(\"nltk.draw package not loaded \"\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEWCAYAAACqitpwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGodJREFUeJzt3WmYbFV97/HvT3CIoownBhU4TlHEgUCrQTEejXOI0atG\nDd6AkSAaTTCi0YuRwwsTB5KoMYkaozjggDhcJBrxkoveYEQOM6goCDhgFIIoIA7A/77Yq6Voqrqr\n+6w+3efw/TxPPV219t5r/WtVdf16711dlapCkqSebrPSBUiStjyGiySpO8NFktSd4SJJ6s5wkSR1\nZ7hIkrozXLTFSvKZJAdsZB8HJvmPjezj/CTrNqaPnnrMyxLGXJ/kA5tyTK0sw0WrQpJLkjyuZ59V\n9eSqem/PPkclWZukklzTLt9PckKSx8+pY4+qOnm56lis5ZqXJEcn+XmbiyuTfC7J/ZfQT/fngjY9\nw0XaeNtV1TbAQ4DPAZ9IcuBKFZNk65UaG3hjm4t7AD8Ajl7BWrSCDBetekn2S3JWkquSfDHJg1v7\nvdtfyHu123dLcsXsIagkJyc5aKSfP07y1SRXJ/nKyHavSnLRSPvTl1JnVf1XVb0FWA+8IcltWv+/\n/Es8ycOSbEjy47an87etfXYv6OAklyX5XpKXj9R+m5E6/zvJsUl2mLPtC5J8C/j3JHdI8oG27lVJ\nTkty17nz0vp9TZJLk/wgyfuSbDun3wOSfKvN7eFTzsVPgA8CDxy3PMlT2+HCq1o9u7f29wO7Ap9q\ne0CvXOzjoNXBcNGq1gLg3cALgR2BdwDHJ7l9VV0E/AVwTJI7Au8Bjh53CCrJsxhe9P8QuAvwVOC/\n2+KLgEcB2wJHAh9IsvNGlP1x4FeB+41Z9hbgLVV1F+DewLFzlj8GuC/wBOBVI4eH/hR4GvBo4G7A\nD4F/mLPto4HdgScCB7T7swvDvB0CXDemngPb5THAvYBtgLfNWWffdl9+G3jtbBDMJ8k2wP7AmWOW\n/TrwIeBQYA3waYYwuV1V/U/gW8DvVtU2VfXGhcbS6mS4aLX7Y+AdVXVqVd3QzhX8DPhNgKr6Z+Ab\nwKnAzsCkv6wPYjhkc1oNLqyqS1sfH62qy6rqxqr6SOvvYRtR82Xt5w5jlv0CuE+Snarqmqr60pzl\nR1bVtVV1LkNYPre1vxA4vKq+U1U/YwjKZ845BLa+bXtdG2dH4D5t3k6vqh+PqWd/4G+r6ptVdQ3w\nauA5c/o9sqquq6qzgbMZDv9NcliSq4ALGYLqwDHrPBv416r6XFX9AjgK+BXgEfP0q82M4aLVbjfg\n5e3wyVXthWsXhr/eZ/0zw+GXv28vvOPswrCHcgtJ/nDksNtVra+dNqLmu7efV45Z9gLg14GvtUNV\n+81Z/u2R65dy0/3cjeFczmyNXwVuAO46Ydv3A58FPtwOs70xyW3H1HO3Ns7omFvP6fe/Rq7/hCE0\nJjmqqrarql+rqqe2vct5x6yqG1vtdx+zrjZThotWu28Dr2svWLOXO1bVh+CXh1/eDPwLsH72PMSE\nfu49tzHJbgzh9BJgx6raDjgPyEbU/HSGk9kXzF1QVd+oqucyHDZ7A3BckjuNrLLLyPVduWkv6NvA\nk+fMwx2q6ruj3Y+M84uqOrKqHsCwR7AfwyHBuS5jCK7RMa8Hvj/lfV2Km42ZJAz3e/a++FHtWwDD\nRavJbduJ6NnL1gwv/IckeXgGd0ryO0nu3LZ5C3B6VR0E/Cvw9gl9v4vhkM3erZ/7tGC5E8OL2eUA\nSZ7PhJPQC0ly1yQvAY4AXt3+Ip+7zvOSrGnLrmrNN4ys8pdJ7phkD+D5wEda+9uB17WaSbImye/N\nU8tjkjwoyVbAjxkOk90wZtUPAS9Lcs8W1H8FfKSqrl/MfV+kY4HfSfLbbW/q5QyHOr/Yln+f4fyP\nNmOGi1aTTzOcdJ69rK+qDQznXd7GcBL7Qtpx/Pbi+iSGk9UAfw7slWT/uR1X1UeB1zG8g+lq4JPA\nDlX1FeBvgP9keFF7EHDKIuu+Ksm1wLnAU4BnVdW7J6z7JOD8JNcwBONzquqnI8s/3+7jSQyHmE5s\n7W8BjgdOTHI18CXg4fPU9GvAcQzB8tXW77h/Ynw3wyG0LwAXAz8FXjr/3d04VXUB8Dzg74ErgN9l\nOIH/87bKXwOvaYcAD1vOWrR84peFSSsvyVqGF/fbLvNeg7RJuOciSerOcJEkdedhMUlSd+65SJK6\nW8kPuFtRO+20U61du3aly5Ckzcrpp59+RVWtWWi9W224rF27lg0bNqx0GZK0WUly6cJreVhMkrQM\nDBdJUneGiySpO8NFktSd4SJJ6s5wkSR1Z7hIkrozXCRJ3RkukqTuDBdJUneGiySpO8NFktSd4SJJ\n6s5wkSR1Z7hIkrozXCRJ3RkukqTuDBdJUneGiySpO8NFktSd4SJJ6s5wkSR1Z7hIkrozXCRJ3Rku\nkqTuDBdJUneGiySpO8NFktSd4SJJ6s5wkSR1Z7hIkrozXCRJ3RkukqTuDBdJUneGiySpO8NFktSd\n4SJJ6s5wkSR1Z7hIkrozXCRJ3RkukqTuDBdJUneGiySpO8NFktSd4SJJ6s5wkSR1Z7hIkrozXCRJ\n3RkukqTuDBdJUnebTbgkHJLwh+36gQl3W+maVoP165dn3VuTW9u8bM73t2ft8/W1qcZZzm03ZZ/j\npKo2zUgdJZwMHFbFhqX2MTMzUxs2LHnzVSOBaR/Cxax7a3Jrm5fN+f72rH2+vjbVOMu57XL1meT0\nqppZaL2tlz7E8mp7KYcBBZwDXARcA1wCzADHJFwHHA4cVMXT23aPB15Uxf9YibolSav0sFjCHgyh\n8dgqHgL82eyyKo4DNgD7V7En8Glg94Q1bZXnA+8Z328OTrIhyYbLL798We+DJN2arcpwAR4LHFfF\nFQBVXDlpxSoKeD/wvITtgH2Az4xft95ZVTNVNbNmzZpxq0iSOlith8XCcDhsWu8BPgX8FPhoFdcv\nS1WSpKms1j2Xk4DfT9gRIGGHOcuvBu48e6OKy4DLgNcAR2+iGleFI45YnnVvTW5t87I539+etc/X\n16YaZzm33ZR9jrNq3y2WcADwCuAG4EyGE/nXVHFUwjOAvwKuA/ap4rqE5wCHVvGb0/S/pbxbTJI2\npc3+3WJVvBd474RlHwM+Nqd5X+Cfl7suSdLCVm24LEbC6cC1wMtXuhZJ0hYSLlXsvdI1SJJuslpP\n6EuSNmOGiySpO8NFktSd4SJJ6s5wkSR1Z7hIkrozXCRJ3RkukqTuDBdJUneGiySpO8NFktSd4SJJ\n6s5wkSR1Z7hIkrozXCRJ3RkukqTuDBdJUneGiySpO8NFktSd4SJJ6s5wkSR1Z7hIkrozXCRJ3Rku\nkqTuDBdJUneGiySpO8NFktSd4SJJ6s5wkSR1Z7hIkrozXCRJ3RkukqTuDBdJUneGiySpO8NFktSd\n4SJJ6s5wkSR1Z7hIkrozXCRJ3RkukqTuDBdJUneGiySpO8NFktTdqgqXhHUJjxi5fXTCM1eyJk1v\n/fqVrmD1cm62DJMeRx/fW0pVrXQNv5SwHrimiqPa7aOBE6o4bgl9BUgVN45bPjMzUxs2bNiIajVX\nAqvo6bSqODdbhkmP463p8U1yelXNLLRe9z2XhLUJX0t4V8J5CcckPC7hlIRvJDwsYYeETyack/Cl\nhAcnrAUOAV6WcFbCo1qXv5XwxYRvju7FJLwi4bTWx5EjY3814R+BM4Bdet8/SdLCtl6mfu8DPAs4\nGDgN+ANgX+CpwP8Cvg2cWcXTEh4LvK+KPRPezs33XF4A7Ny2vT9wPHBcwhOA+wIPAwIcn/BbwLeA\n+wHPr+LFc4tKcnCriV133XWZ7rokabnOuVxcxbntkNT5wElVFHAusJYhLN4PUMW/AzsmbDuhr09W\ncWMVXwHu2tqe0C5nMuyh3J8hbAAureJL4zqqqndW1UxVzaxZs2aj76Qkabzl2nP52cj1G0du39jG\nvH7MNpOOWI72lZGff13FO0ZXbIfWrl1ssZKkvlbq3WJfAPaH4R1iwBVV/Bi4GrjzFNt/FvijhG1a\nH3dP+NVlqlVTOuKIla5g9XJutgyTHkcf31tarj2XhawH3pNwDvAT4IDW/imGcyq/B7x00sZVnJiw\nO/CfGfZlrgGeB9ywjDVrAb4dczLnZsvgW5Gnt6reirwp+VZkSVq8FXsrsiRJhoskqTvDRZLUneEi\nSerOcJEkdWe4SJK6M1wkSd0ZLpKk7gwXSVJ3hoskqTvDRZLUneEiSerOcJEkdWe4SJK6M1wkSd0Z\nLpKk7gwXSVJ3hoskqTvDRZLUneEiSerOcJEkdWe4SJK6M1wkSd0ZLpKk7gwXSVJ3hoskqTvDRZLU\nneEiSerOcJEkdWe4SJK6M1wkSd0ZLpKk7gwXSVJ3hoskqTvDRZLUneEiSerOcJEkdWe4SJK6M1wk\nSd0ZLpKk7gwXSVJ3hoskqTvDRZLUneEiSerOcJEkdTdVuCQ8PaES7r8cRSTMJLx1OfpeDuvXD5fR\n2+Pa5y6f1Nc04/U2WvO0Y61bN13f223Xp+ZJNY6rZdJ44x6nacdYqK6F2teuHd//unU31T+3vtll\no+vM3Wa77W6+zeh24/oc7WO+mteuveXyueOM62f9+ptqmq1lPuO2Hzf2fL9jc+d13LgLPfbjahmt\nZ3Q+F/p9nzQ349af77GavT07H6P3d3SMdeuGdUafY6MmPQaTXqOWQ6pq4ZXCscDOwElVrO9aQNi6\niut79jmNmZmZ2rBhw5K2TYafs1OXDNfnto+uP2ma51u2mHUWa7Tm0b43ttbZ9WDja55U47haJtU2\n2j5fP4uZ42nGmr0Nt+x/ofZRi91m3PJp7v9892u+8cctG617Ul+Ttl+ottH7O1rLuD6muW+T5mTu\nGKN9jxtzmufqNI/VpPs1rp657Qvdr7ljLFWS06tqZqH1FtxzSdgGeCTwAuA5rW1dwucTjk34esLr\nE/ZP+HLCuQn3buutSfhYwmnt8sjWvj7hnQknAu9r/Z0wO17Ce1o/5yQ8o7X/U8KGhPMTjhyp75KE\nIxPOaNssy96VJGl6W0+xztOAf6vi6wlXJuzV2h8C7A5cCXwTeFcVD0v4M+ClwKHAW4C/q+I/EnYF\nPtu2Adgb2LeK6xLWjYz3l8CPqngQQML2rf3wKq5M2Ao4KeHBVZzTll1RxV4JLwYOAw4ad0eSHAwc\nDLDrrrtOcdclSUsxzTmX5wIfbtc/3G4DnFbF96r4GXARcGJrPxdY264/DnhbwlnA8cBdEu7clh1f\nxXVjxnsc8A+zN6r4Ybv6+wlnAGcCewAPGNnm4+3n6SNj30JVvbOqZqpqZs2aNZPvsSRpo8y755Kw\nI/BY4IEJBWwFFPBp4Gcjq944cvvGkX5vA+wzN0Tacb9rJw3bxhhd/54MeyQPreKHCUcDdxhZZXbs\nGxa6T5Kk5bfQC/EzgfdV8cLZhoTPA/tO2f+JwEuAN7Vt96zirCm3ObRtsz1wF4Yw+lHCXYEnAydP\nWUN3Rxwx/vbc9knrT7tsMess1qSa5xvr0Y+eru9tt4VDD11aXeNqGVfT3Fqmmftx6yz02C3U53zt\nu+0GBx54y2Wjtc+t7+STx/c9us22295y+9HtJt3ncY/f6PLddhs/9kL9HHEEvPnNN12fdB/m2/7o\no+cfc6Hn67hxF3rsx9UyWs/sO7am+X2fvT7fHC/msZp9LOZ77l9yyfBz9jk2bp1JtWwK875bLOFk\n4PVV/NtI258CLwIuqmK/kfUOq2JDO39yWBX7JezEcIhrd4Yg+0IVhySsB66p4qi2/eg227Rt9mbY\nEzmyio+3vZWHM5zf+RnDYbWjEy4BZqq4ImEGOKrqZudwxtqYd4tJ0q3VtO8Wm+qtyFsiw0WSFq/b\nW5ElSVosw0WS1J3hIknqznCRJHVnuEiSujNcJEndGS6SpO4MF0lSd4aLJKk7w0WS1J3hIknqznCR\nJHVnuEiSujNcJEndGS6SpO4MF0lSd4aLJKk7w0WS1J3hIknqznCRJHVnuEiSujNcJEndGS6SpO4M\nF0lSd4aLJKk7w0WS1J3hIknqznCRJHVnuEiSujNcJEndGS6SpO4MF0lSd4aLJKk7w0WS1J3hIknq\nznCRJHVnuEiSujNcJEndGS6SpO4MF0lSd4aLJKk7w0WS1J3hIknqznCRJHVnuEiSujNcJEndGS6S\npO4MF0lSd4aLJKm7VNVK17AiklwOXLrIzXYCrliGcnqzzr6ss6/NpU7YfGrdlHXuVlVrFlrpVhsu\nS5FkQ1XNrHQdC7HOvqyzr82lTth8al2NdXpYTJLUneEiSerOcFmcd650AVOyzr6ss6/NpU7YfGpd\ndXV6zkWS1J17LpKk7gwXSVJ3hssUkjwpyQVJLkzyqk005i5J/m+SryY5P8mftfYdknwuyTfaz+1b\ne5K8tdV4TpK9Rvo6oK3/jSQHjLTvneTcts1bk2Qj6t0qyZlJTmi375nk1DbmR5LcrrXfvt2+sC1f\nO9LHq1v7BUmeONLeZf6TbJfkuCRfa/O6z2qczyQva4/5eUk+lOQOq2U+k7w7yQ+SnDfStuxzOGmM\nRdb5pvbYn5PkE0m2W+pcLeXxmLbOkWWHJakkO630fC5JVXmZ5wJsBVwE3Au4HXA28IBNMO7OwF7t\n+p2BrwMPAN4IvKq1vwp4Q7v+FOAzQIDfBE5t7TsA32w/t2/Xt2/Lvgzs07b5DPDkjaj3z4EPAie0\n28cCz2nX3w68qF1/MfD2dv05wEfa9Qe0ub09cM8251v1nH/gvcBB7frtgO1W23wCdwcuBn5lZB4P\nXC3zCfwWsBdw3kjbss/hpDEWWecTgK3b9TeM1LnouVrs47GYOlv7LsBnGf7Re6eVns8l/b717nBL\nu7QH5rMjt18NvHoF6vjfwOOBC4CdW9vOwAXt+juA546sf0Fb/lzgHSPt72htOwNfG2m/2XqLrO0e\nwEnAY4ET2hP5ipFf5F/OYfuF2add37qtl7nzOrter/kH7sLwop057atqPhnC5dvthWLrNp9PXE3z\nCazl5i/ayz6Hk8ZYTJ1zlj0dOGbcHCw0V0t5fi+2TuA44CHAJdwULis6n4u9eFhsYbO/7LO+09o2\nmbZr/RvAqcBdq+p7AO3nr7bVJtU5X/t3xrQvxZuBVwI3tts7AldV1fVj+v5lPW35j9r6i61/se4F\nXA68J8Phu3cluROrbD6r6rvAUcC3gO8xzM/prL75HLUp5nDSGEv1Rwx/yS+lzqU8v6eW5KnAd6vq\n7DmLVvN83oLhsrBxx8032fu3k2wDfAw4tKp+PN+qY9pqCe2LrW8/4AdVdfoUtcy3bFnrZPgrci/g\nn6rqN4BrGQ4HTLJS87k98HsMh2fuBtwJePI8fa/UfE5jVdaW5HDgeuCY2aZF1rOU5/e0td0ROBx4\n7bjFi6xnRV+7DJeFfYfh+OesewCXbYqBk9yWIViOqaqPt+bvJ9m5Ld8Z+MECdc7Xfo8x7Yv1SOCp\nSS4BPsxwaOzNwHZJth7T9y/racu3Ba5cQv2L9R3gO1V1art9HEPYrLb5fBxwcVVdXlW/AD4OPILV\nN5+jNsUcThpjUdrJ7v2A/asdE1pCnVew+MdjWvdm+MPi7PY7dQ/gjCS/toQ6l30+59X7ONuWdmH4\ni/ebDA/47Em9PTbBuAHeB7x5TvubuPmJuDe267/DzU/2fbm178BwrmH7drkY2KEtO62tO3uy7ykb\nWfM6bjqh/1FufsLzxe36n3DzE57Htut7cPOTqt9kOKHabf6B/wfcr11f3+ZyVc0n8HDgfOCOrZ/3\nAi9dTfPJLc+5LPscThpjkXU+CfgKsGbOeoueq8U+Houpc86yS7jpnMuKzuein8u9O9wSLwzv0vg6\nwztHDt9EY+7LsAt7DnBWuzyF4fjtScA32s/ZJ1GAf2g1ngvMjPT1R8CF7fL8kfYZ4Ly2zdtY4MTj\nFDWv46ZwuRfDO1UubL+It2/td2i3L2zL7zWy/eGtlgsYeadVr/kH9gQ2tDn9ZPtFXHXzCRwJfK31\n9X6GF71VMZ/AhxjOBf2C4S/jF2yKOZw0xiLrvJDh3MTs79PblzpXS3k8pq1zzvJLuClcVmw+l3Lx\n418kSd15zkWS1J3hIknqznCRJHVnuEiSujNcJEndGS7SBEn+LsmhI7c/m+RdI7f/Jsmfb0T/65Mc\nNmHZwe0TfL+W5MtJ9h1Z9qgMn5p8VpJfaZ/2e36SNy1y/LVJ/mCp9UvzMVykyb7I8N/xJLkNsBPD\nP9zNegRwyjQdJdlq2kHbR+q8ENi3qu4PHAJ8sP2XNsD+wFFVtWdVXdfW3auqXjHtGM1awHDRsjBc\npMlOoYULQ6icB1ydZPsktwd2B85s37Pxpgzfv3JukmcDJFmX4Tt5PsjwT28kObx9P8j/Ae43Ydy/\nAF5RVVcAVNUZDP+p/ydJDgJ+H3htkmOSHM/w+WOnJnl2kme1Os5O8oU25latvtPa94C8sI3zeuBR\nbQ/oZT0nTtp64VWkW6equizJ9Ul2ZQiZ/2T4VNl9GD7t9pyq+nmSZzD89/9DGPZuTpt9YQceBjyw\nqi5OsjfDR4L8BsPv3hkMn3g81x5j2jcAB1TVX7ZDZCdU1XEASa6pqj3b9XOBJ1bVd3PTl2G9APhR\nVT20heIpSU5k+NiPw6pqv42bKemWDBdpfrN7L48A/pYhXB7BEC5fbOvsC3yoqm5g+EDAzwMPBX7M\n8PlPF7f1HgV8oqp+AtD2OqYVpvtE21OAo5Mcy/ChlzB8SdaDkzyz3d4WuC/w80WMLy2Kh8Wk+c2e\nd3kQw2GxLzHsuYyeb5nv64yvnXN7moD4CrD3nLa9Wvu8quoQ4DUMn5J7VpIdW30vbedo9qyqe1bV\niVPUIS2Z4SLN7xSGj2i/sqpuqKorGb4eeR+Gw2QAXwCe3c5trGH46tovj+nrC8DT2zu87gz87oQx\n3wi8oQUDSfZk+Krjf1yo2CT3rqpTq+q1DB8NP/t1uS9qX+FAkl9vX5R2NcNXaEvdeVhMmt+5DOdR\nPjinbZvZE+7AJxjC5myGPZNXVtV/Jbn/aEdVdUaSjzB8Iu+lDF8BcAtVdXySuwNfTFIMIfC8at8c\nuIA3Jbkvw97KSa2mcxjeGXZGkjB8I+fTWvv1Sc4Gjq6qv5uif2kqfiqyJKk7D4tJkrozXCRJ3Rku\nkqTuDBdJUneGiySpO8NFktSd4SJJ6u7/A7eRHNoxvf9FAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1111564d0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#离散图表示 判断词在文 中的位置\n",
    "text4.dispersion_plot([\"city\", \"mother\", \"American\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "generate() takes exactly 2 arguments (1 given)",
     "traceback": [
      "\u001b[0;31m\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0mTraceback (most recent call last)",
      "\u001b[0;32m<ipython-input-10-25931a0d818a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#产生一些随机文本\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext3\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m: generate() takes exactly 2 arguments (1 given)"
     ],
     "output_type": "error"
    }
   ],
   "source": [
    "#产生一些随机文本 , Deprecate\n",
    "print(text3.generate())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "16.0501972033\n"
     ]
    }
   ],
   "source": [
    "#计数词汇, 每个字平均被使用 了 16 次\n",
    "from __future__ import division\n",
    "print (len(text3) / len(set(text3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<FreqDist with 19317 samples and 260819 outcomes>\n[u'funereal', u'unscientific', u'divinely', u'foul', u'four', u'gag', u'prefix', u'woods', u'clotted', u'Duck', u'hanging', u'plaudits', u'woody', u'Until', u'marching', u'disobeying', u'canes', u'granting', u'advantage', u'Westers', u'insertion', u'DRYDEN', u'formless', u'Untried', u'superficially', u'Western', u'portentous', u'beacon', u'meadows', u'sinking', u'Ding', u'Spurn', u'treasuries', u'churned', u'oceans', u'powders', u'tinkerings', u'tantalizing', u'yellow', u'bolting', u'uncertain', u'stabbed', u'bringing', u'elevations', u'ferreting', u'believers', u'wooded', u'songster', u'uttering', u'scholar']\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEUCAYAAADjt6tGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl4VdW5x/HvSyCEeR4iIJM4MIkkIs44VAGtU51tHWpL\nr7V1aG9b7eDcqm1vq/X2tqXObRWHakUFKagUR4QwzyBjIIwJEAhJSPLeP/YOHGMIh3CmJL/P8+TJ\nOevsvc/vAOHNXnvttczdERERiYVGyQ4gIiL1h4qKiIjEjIqKiIjEjIqKiIjEjIqKiIjEjIqKiIjE\njIqKiIjEjIqKiIjEjIqKiIjETONkB0i0jh07eq9evWq17549e2jWrFlsA9XRHKmQQTmUoy7kSIUM\nh5sjJydnq7t3impjd29QX1lZWV5bM2fOrPW+sZQKOVIhg7tyVKUcX5QKOVIhg/vh5QBmepT/x6r7\nS0REYkZFRUREYkZFRUREYkZFRUREYkZFRUREYkZFRUREYkZFRUSknnN3Nu8uS8h7NbibH0VEGgJ3\nZ+GGnUxckMfE+RtZs203OdmltG2eHtf3VVEREakn3J25uTuYOD+PCQvyWJe/Z99rrdONFZt3kd2r\nfVwzqKiIiNRhFRXO7HUFTJi/kXcWbGT99v2FpGPLpowc2IXRAzNpXLA67gUFVFREROqc8gpnxup8\nJs7P452FG9m0s2Tfa11bZzByYFdGDexKdq/2pDUyAHJy1iQkm4qKiEgdUFZewfRV+UyYn8ekhRvZ\nuqt032vd2jZj1MCujBqUyQk92tIoLCTJELeiYmbHAC9FNPUB7gGeD9t7AauBK929wMwMeBwYDRQB\nN7r7rPBYNwA/D4/zkLs/F7ZnAc8CzYAJwO3h5GciInVeaVkFH3++lYnzN/LvRRspKNq777Uj2zdn\n9KBMRg3syuDubQj+C02+uBUVd18KDAEwszRgPfA6cBfwrrs/YmZ3hc9/AowC+oVfJwF/Ak4ys/bA\nvUA24ECOmY1394JwmzHApwRFZSQwMV6fSUQk3krKyvlw+VYmzN/I5EUb2Vm8fyhwn04tGD0wk1GD\nutI/s3XKFJJIier+Ogf43N3XmNnFwIiw/TlgKkFRuRh4PjzT+NTM2ppZZrjtZHfPBzCzycBIM5sK\ntHb3T8L254FLUFERkTqmeG85U5duYeKCPN5dvJldJfsLyTFdWjFqUFdGD8qkX+eWKVlIIiWqqFwN\nvBg+7uLueQDunmdmncP2bsC6iH1yw7aa2nOraf8SMxtDcEZDZmYmOTk5tfoQRUVFtd43llIhRypk\nUA7lqAs5DpShuKyCWXmlfJJbzKy8EorL9/fc927bmOHdMxjeLYPurRsDhRTmFjIr90uHOewcsRb3\nomJm6cBFwN0H27SaNq9F+5cb3ccCYwGys7M9KyvrIFGql5OTQ233jaVUyJEKGZRDOepCjsgMhcV7\neW/JZibO38jUZVsp3luxb7vju7dhVHiNpGeHFnHNEU+JOFMZBcxy903h801mlhmepWQCm8P2XKBH\nxH7dgQ1h+4gq7VPD9u7VbC8ikjJ2lVbwz5xcJi7IY9qyrZSW7y8kQ49sy+hBmZw/oCs92jdPYsrY\nSURRuYb9XV8A44EbgEfC729EtH/PzMYRXKjfERaeScCvzKxduN15wN3unm9mhWY2HJgOXA88Ef+P\nIyJSM3cnZ00Bz3y0mkkLNlPmwe/OZjCsd3tGD+zK+QO7ktkm+WvXx1pci4qZNQe+AnwnovkR4GUz\nuxlYC1wRtk8gGE68gmBI8U0AYfF4EJgRbvdA5UV74Bb2DymeiC7Si0gSFe8t5615eTz78SoWrN8J\nBLP2ntK3A6MGZXL+gC50bpWR3JBxFtei4u5FQIcqbdsIRoNV3daBWw9wnKeBp6tpnwkMjElYEZFa\n2rijmH9MX8ML09eybXdwU2L7FulcO+xIBrfYwXmnDUtywsTRHfUiIrXg7sxau51nP17NxPl5lFUE\n44QGHNGaG0/pxVePP4KMJmlJH32WaCoqIiKHoKSsnLfn5fHsx6uZl7sDgLRGxgWDMrnx1F5k92yX\n8veSxJOKiohIFDbvLObv09fywvQ1++bdate8CdcMO5KvD+/JEW3r30X32lBRERGpwZx123n2o1W8\nPT+PveENisd2bcVNp/bi4iHdyGiSluSEqUVFRUSkitKyCiYuyOOZj1YzZ912ABoZjBzQlRtP7cVJ\nvds36C6umqioiIiEthSW8ML0tfx9+hq2FAZrlLRp1oSrh/XgG8N70r1d/bhBMZ5UVESkwZuXu51n\nP1rNW/Py9t3xfkyXVtx4ai8uGdKNZunq4oqWioqINEh7yyuYuGAjz360illrgy4uM/hK/y7cdGov\nTu7TQV1ctaCiIiINyrZd+7u4KpfhbZXRmKtP7MH1J/eqN3NwJYuKiog0CAvW7+DZj1czfu4GSsuC\nLq6jOrfkxlN6cdnQbjRP13+HsaA/RRGpt8oqPLxRcRUzVhcAQRfXucd15sZTenPqUeriijUVFRGp\ndwp2l/LCZ2t5etoWtu0JVt1o1bQxV57Yg+tP7hmX9UokoKIiIvXGqq27eerDlbyak7tvAaw+nVpw\n0ym9uHRod1o21X958aY/YRGp09ydz1bl89cPVvHukk14uP7rGUd34owuZXxz1Mk0aqQurkRRURGR\nOmlveQUT5ufx5AermL8+mNgxPa0Rl57QjZtP783RXVqRk5OjgpJgKioiUqfsLN7LuM/W8uxHq9mw\noxgI1i75+vCefGN4Tzq1aprkhA2bioqI1Anr8ot45qPVvDRjLbtLy4Hgesm3TuvDZUM1sWOqUFER\nkZQ2a20BT32wiokL8gjXweKUvh341um9GXF0Z3VvpRgVFRFJOeUVzr8XbuTJD1eRsya4v6RxI+OS\nIUfwzdN6M7BbmyQnlANRURGRlLG7pIyXZ67j6Y9WsS5/DwCtMxpz3fCe3HByL7q2yUhyQjkYFRUR\nSbq8HXt49uPVvDB9LYXFZQAc2b45N5/Wm8uzutNC95fUGfqbEpGkWbB+B09+sJK35uVRFl4wye7Z\njm+d3oev9O9Cmq6X1DkqKiKSUBUVzvtLN/PXD1by6cp8IFhV8YLBmXzrtN6ccGS7JCeUw6GiIiIJ\nsae0nNdm5/LUh6tYuWU3AC2bNuaqE3tw4ymacr6+UFERkbjaXFjM3z9Zw98+XUNB0V4AjmiTwU2n\n9uaqYT1ondEkyQklllRURCQulm4s5KkPV/Kv2Rv2LdE7uHsbvnV6H0YP7ErjtEZJTijxoKIiIjHj\n7ny4Yiv/My2fOZumAcH6Jef178K3Tu/Dib3aaf2Sek5FRUQOm7szddkWHpuynLnrgvXemzVJ44rs\n7tx0am96d9T6JQ1FXIuKmbUFngQGAg58E1gKvAT0AlYDV7p7gQW/vjwOjAaKgBvdfVZ4nBuAn4eH\nfcjdnwvbs4BngWbABOB298qJr0Uk3tydqUu38NiUZczNDWYK7tAinfN7p/Pjy06mbfP0JCeURIv3\nmcrjwDvufrmZpQPNgZ8C77r7I2Z2F3AX8BNgFNAv/DoJ+BNwkpm1B+4FsgkKU46ZjXf3gnCbMcCn\nBEVlJDAxzp9JpMGrrph0bJnOd87oy3XDj2Tx/LkqKA1U3IqKmbUGzgBuBHD3UqDUzC4GRoSbPQdM\nJSgqFwPPh2can5pZWzPLDLed7O754XEnAyPNbCrQ2t0/CdufBy5BRUUkbtyDe0wem7KcedUUk+bp\n6lFv6OL5L6APsAV4xsyOB3KA24Eu7p4H4O55ZtY53L4bsC5i/9ywrab23Grav8TMxhCc0ZCZmUlO\nTk6tPlBRUVGt942lVMiRChmUI3E53J2cvBJeXrSLzwuCaVTaNG3EJce24Pw+zWnauIDF8wvinuNQ\npUKOVMiQyBzxLCqNgaHA9919upk9TtDVdSDVDQnxWrR/udF9LDAWIDs727OysmrKfUA5OTnUdt9Y\nSoUcqZBBOeKfw915b0lwZlK5umLHlk35rzP7cN1JPWmWXv0aJvX1z6OuZkhkjngWlVwg192nh89f\nJSgqm8wsMzxLyQQ2R2zfI2L/7sCGsH1ElfapYXv3arYXkcPk7ry7eDOPv3toxUQkbkXF3Tea2Toz\nO8bdlwLnAIvCrxuAR8Lvb4S7jAe+Z2bjCC7U7wgLzyTgV2ZWOSHQecDd7p5vZoVmNhyYDlwPPBGv\nzyPSEFQWk8feXcaC9TuBoJjcMqIv1w47UsVEDireV9W+D/wjHPm1ErgJaAS8bGY3A2uBK8JtJxAM\nJ15BMKT4JoCweDwIzAi3e6Dyoj1wC/uHFE9EF+lFasXdmbJ4M49HFJNOrZryX2eqmMihiWtRcfc5\nBEOBqzqnmm0duPUAx3kaeLqa9pkE98CISC24O5MXbeLxd5ezcMP+YnLLmX259qQjte67HDKN/xNp\ngKorJp0rz0xUTOQwqKiINCDuzr8XbeLxKctZlLe/mNwyoi/XDFMxkcOnoiLSAByomHx3RF+uVjGR\nGDpoUTGzFsAed68ws6OBY4GJ7r437ulE5LBUVITF5N3lLFYxkQSI5kxlGnB6OKT3XWAmcBVwXTyD\niUjtVVdMurRuyndHHMVVJ/ZQMZG4iaaomLsXhUOAn3D3X5vZ7HgHE5FDV+HOOwvyeGzKcpZsLASg\na+sMbhnRV8VEEiKqomJmJxOcmdx8CPuJSIIEZyYbeXjyNtbs2AQExeS7Z/XlymwVE0mcaIrD7cDd\nwOvuvtDM+gDvxzeWiESjsphUPTO59ay+XKFiIkkQTVHp4u4XVT5x95Vm9kEcM4nIQVR3zaRr6wwu\n7NuEH33tVJo2VjGR5IimqNwNvBJFm4jEWXVDgyu7ua46sQcL5s5RQZGkOmBRMbNRBHNxdTOzP0S8\n1Booi3cwEdmvumKi0VySimo6U9lAMHz4IoIFtioVAnfGM5SIBA40nYruM5FUdcCi4u5zgblm9oJu\ndBRJrMpZgx+bskzFROqUaK6pDDOz+4Ce4fZGMKlwn3gGE2mIqlvPpFNYTDQ3l9QF0RSVpwi6u3KA\n8vjGEWmYqlu2V1PQS10UTVHZ4e5a/EokDtyd95cGxWRe7v5le28Z0ZfrVEykDoqmqLxvZr8BXgNK\nKhvdfVbcUonUc+7O1KVbeGzKMubmag14qT+iKSonhd8jV3B04OzYxxGp39ydqcu28NiU5cxdtx2A\nji3T+a8z+6qYSL1w0KLi7mclIohIfXagYvKdM/ry9eEqJlJ/RLOeyj3Vtbv7A7GPI1K/uDv/CYvJ\nnLCYdGiRznfO7MPXh/ekebrmZpX6JZp/0bsjHmcAFwKL4xNHpH5wd6Yt38pjU5Yxe21QTNq3SOc7\nZ/ThGyermEj9FU331/9EPjez3wLj45ZIpA5zdz4Ii8msKsXk68N70qKpionUb7X5F94c0I2PIhGC\nYhJ0c+WsKQCCYjLmjD58Q8VEGpBorqnMJxjtBZAGdAJ0PUUkNH3lNu59P58l24LFsdo1b8KYM/py\n/ckqJtLwRPMv/sKIx2XAJnfXLMXS4BUW7+XhiUt4YfpaANo2b8KYM/pw/cm9aKliIg1UNNdU1pjZ\n8cDpYdM0YF5cU4mkuGnLtnDXP+exYUcxTdKMS49pzj1XnaZiIg1eNN1ftwPfJrijHuAfZjbW3Z+I\nazKRFLSzeC+/fGsxL81cB8Dg7m34zeXHs2v9MhUUEaLr/roZOMnddwOY2aPAJ4CKijQo7y/ZzN2v\nzWfjzmLS0xpxx1f6Meb0PjROa0TO+mSnE0kNjaLYxvji7MTlYdvBdzRbbWbzzWyOmc0M29qb2WQz\nWx5+bxe2m5n9wcxWmNk8MxsacZwbwu2Xm9kNEe1Z4fFXhPtGlUvkUOwo2ssPX57LTc/OYOPOYob0\naMuE20/juyOOonFaND9CIg1HNGcqzwDTzez18PklBNPhR+ssd98a8fwu4F13f8TM7gqf/wQYBfQL\nv04C/gScZGbtgXsJ5h5zIMfMxrt7QbjNGOBTYAIwEtCMyhIzUxZt4qevz2dzYQlNGzfih+cdzc2n\n9SGtkX5/EalONBfqf2dmU4HTCM5QbnL32YfxnhcDI8LHzwFTCYrKxcDz7u7Ap2bW1swyw20nu3s+\ngJlNBkaGmVq7+ydh+/MEBU9FRQ5bwe5S7n9zIf+aswGArJ7t+PXlg+nbqWWSk4mkNgv+D6/mBbMT\ngY5V11Ixs4uA9e6eU+2OX9x2FVBAcIbxF3cfa2bb3b1txDYF7t7OzN4CHnH3D8P2dwmKzQggw90f\nCtt/AewhKEaPuPu5YfvpwE/cPXIIdOV7jCE4oyEzMzPrzTffPFj0ahUVFdG8efNa7RtLqZAjFTLE\nK8f09cWMzdnJ9pIK0tPguoGtGNWvOWk19K7W5z8P5aj7GQ43R3Z2do67Zx98y5rPVH4D3FhN+yJg\nLNFNfX+qu28ws87AZDNbUsO21f3Eei3av9zoPpYgM9nZ2Z6VlVVz6gPIycmhtvvGUirkSIUMsc6x\nbVcJ945fyFvzgulVhvVqz68vH0yvji0SmuNwKEfq5UiFDInMUVNR6eDuq6s2uvsKM+sQzcHdfUP4\nfXN4TWYYsMnMMt09L+ze2hxungv0iNi9O7AhbB9RpX1q2N69mu1FDtnb8/K4540FbNtdSrMmadw1\n6li+MbwnjXTtROSQ1DR0pVkNrx30Vzcza2FmrSofA+cBCwgmo6wcwXUD8Eb4eDxwfTgKbDjBMsZ5\nwCTgPDNrF44UOw+YFL5WaGbDw1Ff10ccSyQqW3eV8N1/5HDrC7PYtruUk/t0YNIdZ3DDKb1UUERq\noaYzlSlm9kvg5x5x4cXM7gfei+LYXYDXw1G+jYEX3P0dM5sBvGxmNwNrgSvC7ScAo4EVQBFwE4C7\n55vZg8CMcLsHKi/aA7cAzxIUwInoIr1Eyd0ZP3cD941fSEHRXlqkp3H36OO4dtiRKiYih6GmovJD\n4ElghZnNCduOB2YC3zrYgd19Zbh91fZtwDnVtDtw6wGO9TTwdDXtM4GBB8siEmlzYTE/f30B/14U\nTAB52lEdeeRrg+jeLvkXU0XqugMWlfAO+mvMrA8wIGxeGBYLkTrH3Xl99nruf3MRO/bspWXTxvz8\nguO46sQe6L5ZkdiI5j6VlYAKidRpG3cU87PX5/PukmBcyJlHd+LhywZxRNuaLh2KyKHSDHhSr7k7\nr+Tk8uBbiygsLqNVRmPuubA/l2d119mJSByoqEi9tWH7Hu5+bT7/WbYFgHOO7cwvLx1E1zYZSU4m\nUn9FVVTM7DSgn7s/Y2adgJbuviq+0URqx915acY6Hnp7MbtKymjTrAn3XdSfS4Z009mJSJxFs55K\n5WSOxxBMLtkE+DtwanyjiRy63IIi7n5tPh8sD+YwPa9/Fx66ZCCdW+vsRCQRojlTuRQ4AZgFwV3y\nlTc1iqSKigrnhc/W8vCExewuLadd8ybcf/FAvjo4U2cnIgkUTVEpdXc3M4d9d8eLpIx1+UX8+NV5\nfLJyGwCjB3Xl/osG0qlV0yQnE2l4oikqL5vZX4C2ZvZt4JvAX+MbS+TgKiqcCSt28+Ib0ygqLadD\ni3QeuHggFwzOTHY0kQYrmvtUfmtmXwF2ElxXucfdJ8c9mUgN8nbs4QcvzeWTlYUAfPX4I7jvq/3p\n0FJnJyLJFM2F+juBV1RIJFW8s2Ajd702j+1Fe2nTtBGPXnECIwd2TXYsESG67q/WwCQzywfGAa+6\n+6b4xhL5sqLSMh58azEvfrYWgBHHdOIbR8M5KigiKaOmqe8BcPf73X0AwWSPRwD/MbMpcU8mEmHB\n+h189YkPefGztaSnNeLer/bnmRtPpG1GWrKjiUiEQ7mjfjOwEdgGdI5PHJEvqqhwnv5oFb9+Zyml\n5RX069ySP1xzAsdltk52NBGpRjTXVG4BrgI6Aa8C33b3RfEOJrK5sJgfvjx3342M3xjek59dcBwZ\nTXR2IpKqojlT6Qnc4e5zDrqlSIy8t2QTP3plHtt2l9KueRN+ffnxfKV/l2THEpGDOGBRMbPW7r4T\n+HX4vH3k6xGrL4rETPHech6esJjnPlkDwKlHdeB3Vw6hi6ZZEakTajpTeQG4EMgBHIic68KBPnHM\nJQ3Q0o2F3PbibJZuKqRJmvHf5x3Dt0/vo+V9ReqQmlZ+vDD83jtxcaQhcnee/2QNv5ywmNKyCvp0\nbMHjV5/AoO5tkh1NRA5RNBfq33X3cw7WJlIb23aV8ONX5+1bkfGq7B7c89X+tGiqpX5E6qKarqlk\nAM2BjmbWjv3dX60J7lcROSzTlm3hh6/MZUthCa0zGvPI1wYzepDm7RKpy2r6dfA7wB0EBSSH/UVl\nJ/DHOOeSeqykrJzfvLOUJz8M1nkb1rs9j101ROvFi9QDNV1TeRx43My+7+5PJDCT1GMrNu/ithdn\nsyhvJ2mNjDvP7cctI44iTRfjReqFaGYpfsLMBgL9gYyI9ufjGUzqF3dn3Ix13P/mQor3VnBk++Y8\ndvUQhh7ZLtnRRCSGol1OeARBUZkAjAI+BFRUJCoFu0u567V5TFoYzEN62QnduP/iAbTKaJLkZCIS\na9EMsbkcOB6Y7e43mVkX4Mn4xpL64uPPt/KDl+aycWcxLZs25qFLBnLJCd2SHUtE4iSaorLH3SvM\nrMzMWhNMLKkbH6VGe8sr+N3kZfz5P5/jDkOPbMvjV59Aj/bNkx1NROIomqIy08zaEiwhnAPsAj6L\nayqp01Zv3c3t42YzN3cHjQy+f04/bjv7KBqnHXSlBRGp46JZT+W77r7d3f8MfAW4wd1vivYNzCzN\nzGab2Vvh895mNt3MlpvZS2aWHrY3DZ+vCF/vFXGMu8P2pWZ2fkT7yLBthZndFf3Hlnhwd17NyeWC\nP3zA3NwddGvbjHFjTuYHXzlaBUWkgajp5sehNb3m7rOifI/bgcUEN00CPAr83t3HmdmfgZuBP4Xf\nC9z9KDO7OtzuKjPrD1wNDCC4Z2aKmR0dHuuPBIUuF5hhZuM1LX9y7Nizl5+9Pp+35uUBcMHgTH51\n6SDaNNPFeJGGpKbur/+p4TUHzj7Ywc2sO3AB8EvgB2Zm4X7Xhps8B9xHUFQuDh9DsG7L/4bbXwyM\nc/cSYJWZrQCGhdutcPeV4XuNC7dVUUmwGavzuWPcHNZv30Pz9DTuv2gAl2d1J/jrE5GGpKabH8+K\nwfEfA34MtAqfdwC2u3tZ+DwXqBwK1A1YF753mZntCLfvBnwacczIfdZVaT8pBpklSmXlFTzx3gqe\neG85FQ6Du7fh8atPoHfHFsmOJiJJEs19KtdX136wmx/N7EJgs7vnmNmIyubqDnWQ1w7UXl0nvVfT\nhpmNAcYAZGZmkpOTU0PyAysqKqr1vrGUCjlWb93FT383haXb9mLApce24KoBGeSvWUL+msTlSIU/\nC+VQjlTPkMgc0Yz+OjHicQZwDjCLg9/8eCpwkZmNDvdrTXDm0tbMGodnK92BDeH2uUAPINfMGgNt\ngPyI9kqR+xyo/QvcfSwwFiA7O9uzsrIOEr16OTk51HbfWEp2jrfmbeAXH8yhqMzp0ropv79yCKcc\n1TEpWZL9Z6EcylEXMiQyRzTTtHw/8rmZtQH+FsV+dwN3h/uMAP7b3a8zs1cIbqgcB9wAvBHuMj58\n/kn4+nvu7mY2HnjBzH5HcKG+H8GQZgP6mVlvYD3BxfzKazUSB8V7y3no7UX8/dO1AJzXvwuPfm0w\n7VqkJzmZiKSK2ixaUUTwH3tt/QQYZ2YPAbOBp8L2p4C/hRfi8wmKBO6+0MxeJrgAXwbc6u7lAGb2\nPWASkAY87e4LDyOX1GDNtt3c+sIsFqzfSXpaI74xuAU/vzJLF+NF5AuiuabyJvuvVTQimAPs5UN5\nE3efCkwNH69k/+ityG2KgSsOsP8vCUaQVW2fQDAfmcTRxPl5/PjVeRSWlNGjfTP+eO1Q9m76XAVF\nRL4kmjOV30Y8LgPWuHtunPJICikpK+fhCUt49uPVAJw/oAu/vvx42jRrQs6m5GYTkdQUzTWV/wCE\n8341Dh+3d/f8OGeTJFqXX8T3XpjF3NwdNEkz7h51HDed2ktnJyJSo2i6v8YADwJ7gAqCC+SOJpWs\nt/69cCP//cpcdhaX0a1tM/543VCG9Gib7FgiUgdE0/31I2CAu2+NdxhJrtKyCh59ZwlPhcv8nntc\nZ357xfG0ba7RXSISnWiKyucEI76kHsstKOJ7L8xmzrrtNG5k/GTksXzr9N7q7hKRQxJNUbkb+NjM\npgMllY3uflvcUklCvbt4Ez94eS479uzliDYZPHHtULJ6aplfETl00RSVvwDvAfMJrqlIPbG3vILf\nTlrKX6atBOCsYzrxuyuH6GZGEam1aIpKmbv/IO5JJKE2bN/D91+cTc6aAtIaGT86/xjGnN6HRo3U\n3SUitRdNUXk/HAH2Jl/s/tKQ4jrq/aWb+cFLcygo2kvX1hk8ce0JnNirfbJjiUg9EE1RqZxP6+6I\nNg0proPKwnXj/2/q5wCccXQnfn/l8XRo2TTJyUSkvojm5sfeiQgi8bVxRzG3vTibz1bn08jgh+cd\nwy1n9lV3l4jEVNzWU5HUMW3ZFu58aQ7bdpfSuVVT/nDNCQzv0yHZsUSkHorneiqSZOUVzuNTlvHE\n+ytwh9OO6sjvrxpCp1bq7hKR+IjbeiqSXJt3FnPbuNl8ujIfM7jz3KP53tlHkabuLhGJo2SspyJx\n9vGKrdw2bg5bd5XQsWVT/nB18lZmFJGGJSHrqUhilFc4T7y3nMffXY47nNynA49fM4TOrTKSHU1E\nGgitp1JPbCks4c6X5vDhiq2YwW3n9OP2c/qpu0tEEuqARcXMjgK6VK6nEtF+upk1dffP455OovLp\nym18/8XZbCksoUOLdB67egin9+uU7Fgi0gA1quG1x4DCatr3hK9JklVUOP/73nKu/eunbCksYVjv\n9ky4/XQVFBFJmpq6v3q5+7yqje4+08x6xS2RRGXbrhLufHku05ZtAeDWs/py57lH0zitpt8TRETi\nq6aiUtPV3WaxDiLRW7y1lFsnfcjGncW0a96E3181hBHHdE52LBGRGovKDDP7trv/NbLRzG4GcuIb\nS6rj7vy/MFETAAARBklEQVT5Pyv5zdR8Khyye7bjiWtPILONaryIpIaaisodwOtmdh37i0g2kA5c\nGu9g8mV//WAlj76zBIDvnNmH/z7vGJqou0tEUsgBi4q7bwJOMbOzgIFh89vu/l5CkskXrNi8i9/+\nexkAd57UhttHHZfkRCIiXxbNNC3vA+8nIIscQHmF8+NX51JaVsHlWd057ciyZEcSEamW+k7qgGc+\nWsWstdvp0ropv7igf7LjiIgckIpKilu5ZRe/mbQUgIcvG0Sb5k2SnEhE5MBUVFJY0O01j5KyCi4b\n2o2zj+2S7EgiIjWKW1Exswwz+8zM5prZQjO7P2zvbWbTzWy5mb1kZulhe9Pw+Yrw9V4Rx7o7bF9q\nZudHtI8M21aY2V3x+izJ8uzHq5m5poDOrZpy74UDkh1HROSg4nmmUgKc7e7HA0OAkWY2HHgU+L27\n9wMKgJvD7W8GCtz9KOD34XaYWX/gamAAMBL4PzNLM7M04I/AKIKZk68Jt60XVm3dzW8mBcOHf3mp\nur1EpG6IW1HxwK7waZPwy4GzgVfD9ueAS8LHF4fPCV8/x8wsbB/n7iXuvgpYAQwLv1a4+0p3LwXG\nhdvWeRXhaK/ivRVcMuQIvtJf3V4iUjfE9ZpKeEYxB9gMTAY+B7a7e+WY2FygW/i4G7AOIHx9B9Ah\nsr3KPgdqr/Oe+2Q1M1YX0LFlU+79qrq9RKTuqM3Kj1Fz93JgiJm1BV4Hqrtjr3IBsOoW/vAa2qsr\niF5NG2Y2BhgDkJmZSU5O7WaZKSoqqvW+0dq4q4xH/r0NgG8ObsbKJfOTkuNgUiGDcihHXciRChkS\nmSOuRaWSu283s6nAcKCtmTUOz0a6AxvCzXKBHkCumTUG2gD5Ee2VIvc5UHvV9x8LjAXIzs72rKys\nWn2OnJwcartvNCoqnGv++ikl5c5Fxx/Bdy86ISk5opEKGZRDOepCjlTIkMgc8Rz91Sk8Q8HMmgHn\nAosJ7s6/PNzsBuCN8PH48Dnh6++5u4ftV4ejw3oD/YDPgBlAv3A0WTrBxfzx8fo8ifD36WuYviqf\nji3Tue8idXuJSN0TzzOVTOC5cJRWI+Bld3/LzBYB48zsIWA28FS4/VPA38xsBcEZytUA7r7QzF4G\nFhEsZ3xr2K2GmX0PmASkAU+7+8I4fp64WrutiEcmBqO9HrpkIO1bpCc5kYjIoYtbUQkX+PpS/427\nryQYuVW1vRi44gDH+iXwy2raJwATDjtsklVUOD/+51yKSsu5cHAmIwdmJjuSiEit6I76FPCPz9by\n6cp8OrRI5351e4lIHaaikmTr8ot4eMJiAB68ZCAdWjZNciIRkdpTUUkid+cn/5xHUWk5owd1ZfQg\ndXuJSN2mopJEL3y2lo8/30a75k144OKBB99BRCTFqagkSW5BEb96O+j2euDigXRUt5eI1AMqKkng\n7tz92nx2l5YzckBXLhysbi8RqR9UVJLgpRnr+GD5Vto2b8KDlwwkmDdTRKTuU1FJsPXb9/BQ2O11\n/0UD6NRK3V4iUn+oqCRQZbfXrpIyzuvfhYuOPyLZkUREYkpFJYFemZnLtGVbaNOsCQ9dqm4vEal/\nVFQSJG/HHh58axEQdHt1bpWR5EQiIrGnopIAld1ehSVlnHtcFy4eom4vEamfVFQS4NWcXKYu3ULr\njMb8St1eIlKPqajE2cYdxTwQdnvd+9UBdG6tbi8Rqb9UVOLI3fnp6/MpLC7j7GM7c9nQbsmOJCIS\nVyoqcfT67PW8t2QzrTIa86tLB6nbS0TqPRWVONm8s5j7xgcLUd5zYX+6tlG3l4jUfyoqcVDZ7bWz\nuIwRx3Ti8qzuyY4kIpIQKipx8MacDUxZvJlWTRvz8GXq9hKRhkNFJcY2FxZzb9jt9YsL+5PZplmS\nE4mIJI6KSgy5Oz97fQE79uzljKM7cUW2ur1EpGFRUYmh8XM3MHnRJlo2bcwj6vYSkQZIRSVGthSW\n7Bvt9bMLjuOItur2EpGGR0UlBtydX/xrAQVFezntqI5cfWKPZEcSEUkKFZUYeHt+Hu8s3EiL9DQe\n+Zq6vUSk4VJROUxbd5VwzxtBt9dPLziO7u2aJzmRiEjyqKgcpnvfWEj+7lJO6duBa4cdmew4IiJJ\npaJyGN6el8fb8/Nonp7Go18brG4vEWnw4lZUzKyHmb1vZovNbKGZ3R62tzezyWa2PPzeLmw3M/uD\nma0ws3lmNjTiWDeE2y83sxsi2rPMbH64zx8sgf+rb9tVwj1vLADg7tHH0aO9ur1EROJ5plIG/NDd\njwOGA7eaWX/gLuBdd+8HvBs+BxgF9Au/xgB/gqAIAfcCJwHDgHsrC1G4zZiI/UbG8fN8wb3jF7Jt\ndykn9+nAder2EhEB4lhU3D3P3WeFjwuBxUA34GLguXCz54BLwscXA8974FOgrZllAucDk909390L\ngMnAyPC11u7+ibs78HzEseLqnQV5vDUv6Pb69eWDadRI3V4iIpCgaypm1gs4AZgOdHH3PAgKD9A5\n3KwbsC5it9ywrab23Gra46pgdyk//1fQ7fWTkceq20tEJELjeL+BmbUE/gnc4e47a7jsUd0LXov2\n6jKMIegmIzMzk5ycnIPFrlZRURG3PfcBW3eV0r9jE/qnbyUnZ1utjnU4ioqKav0Z6lMG5VCOupAj\nFTIkMkdci4qZNSEoKP9w99fC5k1mlunueWEX1uawPReIvBW9O7AhbB9RpX1q2N69mu2/xN3HAmMB\nsrOzPSsrq1af5//Gf8QHa4vJaNKIP910Kj07tKjVcQ5XTk4Otf0M9SmDcihHXciRChkSmSOeo78M\neApY7O6/i3hpPFA5gusG4I2I9uvDUWDDgR1h99gk4DwzaxdeoD8PmBS+Vmhmw8P3uj7iWDG3vaiU\nsTk7gaDbK1kFRUQklcXzTOVU4BvAfDObE7b9FHgEeNnMbgbWAleEr00ARgMrgCLgJgB3zzezB4EZ\n4XYPuHt++PgW4FmgGTAx/IqL+99cxPaSCob1as8NJ/eK19uIiNRpcSsq7v4h1V/3ADinmu0duPUA\nx3oaeLqa9pnAwMOIGZUpizbx+uz1pKfBoxrtJSJyQHG/UF8fHN+jLaMGdqVr2m56d1S3l4jIgWia\nlih0atWUP309iwv6afiwiEhNVFQOgeb2EhGpmYqKiIjEjIqKiIjEjIqKiIjEjIqKiIjEjIqKiIjE\njIqKiIjEjIqKiIjEjAWzozQcZrYFWFPL3TsCW2MYp7ZSIUcqZADlqEo5vigVcqRCBji8HD3dvVM0\nGza4onI4zGymu2crR2pkUA7lqAs5UiFDInOo+0tERGJGRUVERGJGReXQjE12gFAq5EiFDKAcVSnH\nF6VCjlTIAAnKoWsqIiISMzpTERGRmFFRERGRmNHKj4fIzDKBfHcvSXYWkUpm1g7oB2RUtrn7tOQl\nSg4z6+ruGyOeN9ifVzNrWvVzV9cWazpTOXR/A5aY2W+THSRVmFnXBL3P38Lvtyfi/eoKM/sWMA2Y\nBNwffr8vCTm6mNmF4VfnRL9/6Kkqz5P282pmp5jZtWZ2feVXgiN8EmVbTOlM5RC5+7kWLAHZP1Hv\naWZdgF8BR7j7KDPrD5zs7lV/gJLlKeCCBLxPlpn1BL5pZs8DX1iK093zE5ABMysEDjjCxd1bJyJH\nhNuBE4FP3f0sMzuWoLgkjJldCfwGmErw9/KEmf3I3V9NZA53v6DK84T/vMK+X4D6AnOA8so4wPMJ\neO+uQDegmZmdwP6fk9ZA3NdEV1GpBQ+GzC1M4Fs+CzwD/Cx8vgx4iS//VpYUVX+Q4+jPwDtAHyAn\not0IfmD7JCKEu7cCMLMHgI0Evw0bcB3QKhEZqih292Izq+zeWGJmxyQ4w8+AE919M4CZdQKmAAkt\nKtVJws8rQDbQ35MzvPZ84EagO/C7iPZC4KfxfnMNKa4DzGyGu59oZrPd/YSwbY67D0l2tmQwsz8R\nFJgzwqZp7j43CTmmu/tJB2tLQI7XgZuAO4CzgQKgibuPTmCG+e4+KOJ5I2BuZFtDYmavALe5e14S\nM3zN3f+Z6PfVmUrdsNvMOhB2uZjZcGBHciMl1RLg78BrBGcIfzOzv7r7EwnOUW5m1wHjCP5urmF/\nV0fCuPul4cP7zOx9oA3BGV0iTTSzScCL4fOrgAkJzpBKOgKLzOwzYN+FcXe/KFEB3P2fZnYBMIAv\nDuB4IJ7vqzOVOsDMhgJPAAOBBUAn4HJ3n5fUYEliZvMIrintDp+3AD5x98EJztELeBw4laCofATc\n4e6rE5kjFZjZo8B04DSCQj8NGO7uP0lqsCQxszOra3f3/yQww58JrqGcBTwJXA585u43x/V9VVTq\nBjNrDBxD8AO71N33JjlS0pjZfIL+++LweQYwo6F2taQCM5vl7kOrtM1LdKGX/Sr//CO+twRec/fz\n4vm+6v6qO4YBvQj+zoaaGe4e95EkKeoZYHp4LQHgEpIwaCG8GP1t9v+9AODu30x0lmQxs1uA7wJ9\nwjPISq0IztwaFDP70N1Pq2aEoBGMGUjkyMA94fciMzsC2Ab0jveb6kylDjjQ8ER3vy15qZIr7BLc\n19Xi7rOTkOFj4AOCkWj7rqUk4+JosphZG6Ad8DBwV8RLhYka4i3VM7NfEHSbnwP8kaDIPenuv4jr\n+6qopD4zW0zyhifKATTkEXhSt5hZUyDD3eM+wEfdX3XDAqArkLThiVKtt8xstLs35FFOksLM7BQi\numcT0W2uM5UUZmZvEpyytgKGAEkbnihfFvabtyD4O9lLcvrNRaqVrG5znamktt8S/Ef1KMHF6EqV\nbZJE7t7KzNpTZSJHkRSRlLv6VVRSWOWYdjNrUnV8u5k1S04qqRRO5Hg7wXQYc4DhwMcEF0ZFki0p\n3eYqKilMwzVTXtInchSpqkq3ecLv6ldRSW0vABPRcM1UlQoTOYpUldRucxWVFBYO/9tBMKeUpJ5c\nM2sL/AuYbGYFwIYkZ5IGLtnd5hr9JRID4VxPbYB33L002Xmk4YrsNgc+j3ipFfCRu389ru+voiIi\nUn8ke5YDFRUREYkZrVEvIiIxo6IiIiIxo6IiUktm9jMzW2hm88xsjpnFbRlhM5tqZtnxOr5IrGhI\nsUgtmNnJwIXAUHcvMbOOQHqSY4kknc5URGonE9jq7iUA7r7V3TeY2T1mNsPMFpjZWDMz2Hem8Xsz\nm2Zmi83sRDN7zcyWm9lD4Ta9zGyJmT0Xnv28ambNq76xmZ1nZp+Y2SwzeyVc0Q8ze8TMFoX7/jaB\nfxYi+6ioiNTOv4EeZrbMzP4vYk3y/3X3E919INCM4GymUqm7nwH8GXgDuBUYCNxoZh3CbY4BxobL\n8O4kuN9gn/CM6OfAueHyvTOBH4QTW14KDAj3fSgOn1nkoFRURGrB3XcBWcAYYAvwkpndCJxlZtPN\nbD5wNjAgYrfx4ff5wEJ3zwvPdFYCPcLX1rl75bxufydY3TLScKA/8JGZzQFuAHoSFKBi4Ekzuwwo\nitmHFTkEuqYiUkvuXg5MBaaGReQ7wGAg293Xmdl9fHFK/MpJ/SoiHlc+r/xZrHrjWNXnBkx29y9N\n3WNmwwhmSL4a+B5BURNJKJ2piNSCmR1jZv0imoYAS8PHW8PrHJfX4tBHhoMAIJjz7cMqr38KnGpm\nR4U5mpvZ0eH7tQlXobwjzCOScDpTEamdlsAT4YSSZcAKgq6w7QTdW6uBGbU47mLgBjP7C7Ac+FPk\ni+6+JexmezFcdxyCayyFwBtmlkFwNnNnLd5b5LBpmhaRFGFmvYC3wov8InWSur9ERCRmdKYiIiIx\nozMVERGJGRUVERGJGRUVERGJGRUVERGJGRUVERGJGRUVERGJmf8Hle7x4DK9CP0AAAAASUVORK5C\nYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10cb29c90>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#频率分布, FreqDist 寻找《白鲸记》中最常见的 50 个词\n",
    "fdist1 = FreqDist(text1) \n",
    "print(fdist1)\n",
    "vocabulary1 = fdist1.keys()\n",
    "print(vocabulary1[:50])\n",
    "\n",
    "fdist1.plot(10, cumulative=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[u'#14-19teens', u'#talkcity_adults', u'((((((((((', u'........', u'Question', u'actually', u'anything', u'computer', u'cute.-ass', u'everyone', u'football', u'innocent', u'listening', u'remember', u'seriously', u'something', u'together', u'tomorrow', u'watching']\n"
     ]
    }
   ],
   "source": [
    "#聊天语料库中所有长度超过 7 个字符出现次数超过 7 次的词\n",
    "fdist5 = FreqDist(text5)\n",
    "sortItem = sorted([w for w in set(text5) if len(w) > 7 and fdist5[w] > 7])\n",
    "print(sortItem)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('more', 'is')\n('is', 'said')\n('said', 'than')\n('than', 'done')\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "United States; fellow citizens; four years; years ago; Federal\nGovernment; General Government; American people; Vice President; Old\nWorld; Almighty God; Fellow citizens; Chief Magistrate; Chief Justice;\nGod bless; every citizen; Indian tribes; public debt; one another;\nforeign nations; political parties\n"
     ]
    }
   ],
   "source": [
    "#词语搭配和双连词(bigrams)\n",
    "from nltk import bigrams\n",
    "bg = bigrams(['more', 'is', 'said', 'than', 'done'])\n",
    "for item in bg:\n",
    "    print(item)\n",
    "    \n",
    "#我们希 望找到比我们基于单个词的频率预期得到的更频繁出现的双连词\n",
    "text4.collocations()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20]\n[(1, 47933), (2, 38513), (3, 50223), (4, 42345), (5, 26597), (6, 17111), (7, 14399), (8, 9966), (9, 6428), (10, 3528), (11, 1873), (12, 1053), (13, 567), (14, 177), (15, 70), (16, 22), (17, 12), (18, 1), (20, 1)]\n0.192558824319\n"
     ]
    }
   ],
   "source": [
    "#计数其他东西\n",
    "\n",
    "#通过创造一长串数字的链表的 FreqDist，其中每个数字是文 中对应词的长度\n",
    "fdist = FreqDist([len(w) for w in text1])\n",
    "print(fdist.keys())\n",
    "print(fdist.items())\n",
    "print(fdist.freq(3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### NLTK频率分布类中定义的函数\n",
    "* fdist = FreqDist(samples) 创建包 给定样 的频率分布\n",
    "* fdist.inc (sample) 增加样 \n",
    "* fdist['monstrous'] 计数给定样 出现的次数\n",
    "* fdist.freq('monstrous') 给定样 的频率\n",
    "* fdist.N() 样 总数\n",
    "* fdist.keys() 以频率递减顺序排序的样 链表\n",
    "* for sample in fdist: 以频率递减的顺序遍历样 \n",
    "* fdist.max() 数值最大的样 \n",
    "* fdist.tabulate() 绘制频率分布表\n",
    "* fdist.plot() 绘制频率分布图"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 回到 Python:决策与控制\n",
    "\n",
    "1.5 自动理解自然语言\n",
    "> 词义消歧\n",
    "\n",
    "> 指代消解\n",
    "\n",
    "> 自动生成语言\n",
    "\n",
    "> 机器翻译\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    ""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    ""
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2.0
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}